setwd("~/Dropbox/Papers/MarginalEffects")
library(interflex)
library(ggplot2)
library(reshape2)
library(truncnorm)
set.seed(14850)



ns <- seq(100,2000,300)
errors <- seq(1,5,.5)
beta1s <- seq(0,2.5,.25)
alphas <- seq(0,2.5,.5)
alpha <- 1
beta1 <- 1
beta2 <- 1
n <- 500
error <- 4
names <- c("n","false.plot","false.plot2","false.bin","false.bin2","false.p","ar2")



# interaction plot with no interaction effect
no_interact <- function(n,error,alpha,beta1,beta2){
  x <- rbinom(n,1,.5)
  z <- rnorm(n)
  y <- alpha + beta1*x + beta2*z + rnorm(n,0,error)

  fit <- lm(y~x*z)
  s <- cbind.data.frame(D=x, X=z, Y=y)
  bins <- inter.binning(Y = "Y", D = "D", X = "X", data = s)
  bins.comp <- bins$p.twosided
  bins <- bins$est.binning

  ar2 <- summary(fit)$adj.r.squared
  
  b1 <- coef(fit)[2]
  b3 <- coef(fit)[length(coef(fit))]
  vb1 <- as.matrix(vcov(fit))[2,2]
  vb3 <- as.matrix(vcov(fit))[length(coef(fit)),length(coef(fit))]
  vb1b3 <- as.matrix(vcov(fit))[2,length(coef(fit))]
  mv <- seq(min(z),max(z),length.out=100)
  
  conbx <- b1+b3*mv
  consx <- sqrt(vb1+vb3*(mv^2)+2*vb1b3*mv)
  
  upperx <- conbx+(1.96*consx)
  lowerx <- conbx-(1.96*consx)
  
  #investigate marginal effects: crosses zero heuristic
  dat <- as.data.frame(cbind(mv,conbx,lowerx,upperx))
  if ((dat[dat$mv > quantile(z,.75),]$lowerx[1] < 0)&
      (dat[dat$mv>=quantile(z,.25),]$lowerx[1] > 0)&
      (dat[dat$mv>=quantile(z,.025),]$lowerx[1] > 0)) {
    zero <- 1
  } else if ((dat[dat$mv > quantile(z,.25),]$lowerx[1] < 0)&
             (dat[dat$mv>=quantile(z,.75),]$lowerx[1] > 0)&
             (dat[dat$mv > quantile(z,.975),]$lowerx[1] > 0)) {
    zero <- 1
  } else {zero <- 0}
  
  #investigate marginal effects plots: compare extremes heuristic
  if (max(lowerx) > min(upperx)) {
    extremes <- 1
  } else {extremes <- 0} 
  
  #investigate binning estimator: compare order
  if ((bins[1,4] < 0 )&
      (bins[3,4] > 0 )&
      (bins[3,2] > bins[1,2])) {
    bin <- 1
  } else if ((bins[3,4] < 0 )&
             (bins[1,4] > 0 )&
             (bins[1,2] > bins[3,2])) {
    bin <- 1
  } else {bin <- 0} 
  
  #investigate binning estimator: compare 1 and 3
  if (bins.comp[3] > .05) {bin2 <- 0} else {bin2 <- 1}
  
  #investigate coefficient on b3
  pvalue <- coef(summary(fit))[length(coef(fit)),4]
  if (pvalue < .05) {star <- 1} else {star <- 0}
  
  return(c(n,zero,extremes,bin,bin2,star,ar2))
}



to.sim <- function(n,error,alpha,beta1,beta2) {
  sims <- t(replicate(1000,no_interact(n,error,alpha,beta1,beta2)))
  #sims <- data.frame(cbind(sims,rep(NA,nrow(sims))))
  #colnames(sims) <- c(names,"cond.zero")
  #sims$cond.zero[sims$false.p==0] <- sims$false.plot[sims$false.p==0]
  #colMeans(na.omit(sims))
  return(colMeans(sims, na.rm=TRUE))
}

vary.n <- lapply(ns,to.sim,error=error, alpha=alpha, beta1=beta1, beta2=beta2)
vary.n <- data.frame(matrix(unlist(vary.n),ncol = 7, byrow = TRUE))
names(vary.n) <- names
vary.n
write.csv(vary.n, file="vary.n", row.names=FALSE)
vary.n <- read.csv("vary.n")
vary.n.melt <- melt(vary.n, id.vars = c("n","ar2"), 
                    measure.vars = c("false.plot", "false.plot2","false.bin","false.bin2","false.p"))
p <- ggplot(data=vary.n.melt, aes(x=n, y=value, group=variable, color=variable)) + geom_line() + geom_point()
p + geom_hline(yintercept=0.05) + scale_color_discrete(name="", labels=c("Marginal Effects Plot (1)", 
                                                                         "Marginal Effects Plot (2)",
                                                                         "Binning Estimator (1)",
                                                                         "Binning Estimator (2)",
                                                                         "Coefficient")) +
  theme_classic() + labs(x = "N", y="Probability of Type-1 Error")

vary.error <- lapply(errors,to.sim,n=n, alpha=alpha, beta1=beta1, beta2=beta2)
vary.error <- data.frame(matrix(unlist(vary.error),ncol = 7, byrow = TRUE))
vary.error <- cbind(vary.error,errors/1.25)
names(vary.error) <- c(names,"var.rat")
write.csv(vary.error, file="vary.error", row.names=FALSE)
vary.error <- read.csv("vary.error")
vary.error.melt <- melt(vary.error, id.vars = c("n","ar2","var.rat"), 
                        measure.vars = c("false.plot", "false.plot2","false.bin","false.bin2","false.p"))
p <- ggplot(data=vary.error.melt, aes(x=var.rat, y=value, group=variable, color=variable)) + 
  geom_line(size = 1) + geom_point(size = 2)
p + geom_hline(yintercept=0.05) + scale_color_discrete(name="", labels=c("Marginal Effects Plot (1)", 
                                                                         "Marginal Effects Plot (2)",
                                                                         "Binning Estimator (1)",
                                                                         "Binning Estimator (2)",
                                                                         "Coefficient")) +
  theme_classic() + labs(x = "Ratio of Stochastic to Systematic Variance", y="Probability of Type-1 Error") 

vary.b1b2 <- lapply(beta1s,to.sim,n=n, error=error, alpha=alpha, beta2=beta2)
vary.b1b2 <- data.frame(matrix(unlist(vary.b1b2),ncol = 7, byrow = TRUE))
names(vary.b1b2) <- names
vary.b1b2 <- cbind(vary.b1b2,beta1s)
write.csv(vary.b1b2, file="vary.b1b2", row.names=FALSE)
vary.b1b2 <- read.csv("vary.b1b2")
vary.b1b2.melt <- melt(vary.b1b2, id.vars = c("n","ar2","beta1s"), 
                       measure.vars = c("false.plot", "false.plot2","false.bin","false.bin2","false.p"))
p <- ggplot(data=vary.b1b2.melt, aes(x=beta1s, y=value, group=variable, color=variable)) + 
  geom_line(size = 1) + geom_point(size = 2)
p + geom_hline(yintercept=0.05) + scale_color_discrete(name="", labels=c("Marginal Effects Plot (1)", 
                                                                         "Marginal Effects Plot (2)",
                                                                         "Binning Estimator (1)",
                                                                         "Binning Estimator (2)",
                                                                         "Coefficient")) +
  theme_classic() + labs(x = "Beta1/Beta2", y="Probability of Type-1 Error") 

vary.alpha <- lapply(alphas,to.sim,n=n, error=error, beta1=beta1, beta2=beta2)
vary.alpha <- data.frame(matrix(unlist(vary.alpha),ncol = 7, byrow = TRUE))
names(vary.alpha) <- names
vary.alpha <- cbind(vary.alpha,alphas)
write.csv(vary.alpha, file="vary.alpha", row.names=FALSE)
vary.alpha <- read.csv("vary.alpha")
vary.alpha.melt <- melt(vary.alpha, id.vars = c("n","ar2","alphas"), 
                        measure.vars = c("false.plot", "false.plot2","false.bin","false.bin2","false.p"))
p <- ggplot(data=vary.alpha.melt, aes(x=alphas, y=value, group=variable, color=variable)) + 
  geom_line(size = 1) + geom_point(size = 2)
p + geom_hline(yintercept=0.05) + scale_color_discrete(name="", labels=c("Marginal Effects Plot (1)", 
                                                                         "Marginal Effects Plot (2)",
                                                                         "Binning Estimator (1)",
                                                                         "Binning Estimator (2)",
                                                                         "Coefficient")) +
  theme_classic() + labs(x = "Alpha", y="Probability of Type-1 Error") 




# interaction plot with a linear interaction effect
linear_interact <- function(n,error,alpha,beta1,beta2){
  x <- rbinom(n,1,.5)
  z <- rtruncnorm(n,a=-1.5)
  y <- alpha + beta1*x + beta2*z + 1*x*z + rnorm(n,0,error)
  
  fit <- lm(y~x*z)
  s <- cbind.data.frame(D=x, X=z, Y=y)
  bins <- inter.binning(Y = "Y", D = "D", X = "X", data = s)
  bins.comp <- bins$p.twosided
  bins <- bins$est.binning
  
  ar2 <- summary(fit)$adj.r.squared
  
  b1 <- coef(fit)[2]
  b3 <- coef(fit)[length(coef(fit))]
  vb1 <- as.matrix(vcov(fit))[2,2]
  vb3 <- as.matrix(vcov(fit))[length(coef(fit)),length(coef(fit))]
  vb1b3 <- as.matrix(vcov(fit))[2,length(coef(fit))]
  mv <- seq(min(z),max(z),length.out=100)
  
  conbx <- b1+b3*mv
  consx <- sqrt(vb1+vb3*(mv^2)+2*vb1b3*mv)
  
  upperx <- conbx+(1.96*consx)
  lowerx <- conbx-(1.96*consx)
  
  #investigate marginal effects: crosses zero heuristic
  dat <- as.data.frame(cbind(mv,conbx,lowerx,upperx))
  if ((dat[dat$mv > quantile(z,ptruncnorm(-1,a=-1.5)),]$lowerx[1] < 0)&
             (dat[dat$mv>=quantile(z,.75),]$lowerx[1] > 0)) {
    zero <- 1
  } else {zero <- 0}
  
  #investigate marginal effects plots: compare extremes heuristic
  if (max(lowerx) > min(upperx)) {
    extreme <- 1
  } else {extreme <- 0} 
  
  #investigate binning estimator: compare order
  if ((bins[1,4] < 0 )&
      (bins[3,4] > 0 )&
      (bins[3,2] > bins[1,2])) {
    bin <- 1
  } else if ((bins[3,4] < 0 )&
             (bins[1,4] > 0 )&
             (bins[1,2] > bins[3,2])) {
    bin <- 1
  } else {bin <- 0} 
  
  #investigate binning estimator: compare 1 and 3
  if (bins.comp[3] > .05) {bin2 <- 0} else {bin2 <- 1}
  
  #investigate coefficient on b3
  pvalue <- coef(summary(fit))[length(coef(fit)),4]
  if (pvalue < .05) {star <- 1} else {star <- 0}
  
  return(c(n,zero,extreme,bin,bin2,star,ar2))
}



to_sim_le <- function(n,error,alpha,beta1,beta2) {
  sims <- t(replicate(1000,linear_interact(n,error,alpha,beta1,beta2)))
  #sims <- data.frame(cbind(sims,rep(NA,nrow(sims))))
  #colnames(sims) <- c(names,"cond.zero")
  #sims$cond.zero[sims$false.p==0] <- sims$false.plot[sims$false.p==0]
  #colMeans(na.omit(sims))
  return(colMeans(sims, na.rm=TRUE))
}


vary.n <- lapply(ns,to_sim_le,error=error, alpha=alpha, beta1=beta1, beta2=beta2)
vary.n <- data.frame(matrix(unlist(vary.n),ncol = 7, byrow = TRUE))
names(vary.n) <- names
vary.n <- cbind(vary.n[1],1-vary.n[2:6],vary.n[7])
write.csv(vary.n, file="vary.n.le", row.names=FALSE)
vary.n <- read.csv("vary.n.le")
vary.n.melt <- melt(vary.n, id.vars = c("n","ar2"), 
                    measure.vars = c("false.plot", "false.plot2","false.bin", "false.bin2", "false.p"))
p <- ggplot(data=vary.n.melt, aes(x=n, y=value, group=variable, color=variable)) + geom_line() + geom_point()
p + geom_hline(yintercept=0.05) + scale_color_discrete(name="", labels=c("Marginal Effects Plot (1)", 
                                                                         "Marginal Effects Plot (2)",
                                                                         "Binning Estimator (1)",
                                                                         "Binning Estimator (2)",
                                                                         "Coefficient")) +
  theme_classic() + labs(x = "N", y="Probability of Type-2 Error")


vary.error <- lapply(errors,to_sim_le,n=n, alpha=alpha, beta1=beta1, beta2=beta2)
vary.error <- data.frame(matrix(unlist(vary.error),ncol = 7, byrow = TRUE))
vary.error <- cbind(vary.error,errors/1.25)
names(vary.error) <- c(names,"var.rat")
vary.error <- cbind(vary.error[1],1-vary.error[2:6],vary.error[7:8])
write.csv(vary.error, file="vary.error.le", row.names=FALSE)
vary.error <- read.csv("vary.error.le")
vary.error.melt <- melt(vary.error, id.vars = c("n","ar2","var.rat"), 
                        measure.vars = c("false.plot", "false.plot2","false.bin", "false.bin2", "false.p"))
p <- ggplot(data=vary.error.melt, aes(x=var.rat, y=value, group=variable, color=variable)) + 
  geom_line(size = 1) + geom_point(size = 2)
p + geom_hline(yintercept=0.05) + scale_color_discrete(name="", labels=c("Marginal Effects Plot (1)", 
                                                                         "Marginal Effects Plot (2)",
                                                                         "Binning Estimator (1)",
                                                                         "Binning Estimator (2)",
                                                                         "Coefficient")) +
  theme_classic() + labs(x = "Ratio of Stochastic to Systematic Variance", y="Probability of Type-2 Error") 





# interaction plot with a nonlinear interaction effect
nonlinear_interact <- function(n,error,alpha,beta1,beta2){
  x <- rbinom(n,1,.5)
  z <- rtruncnorm(n, a=-1.5)
  e <- rnorm(n,0,error)
  y <- rep(NA,n)
  y[z<(-.5)] <- alpha + 0*x[z<(-.5)] + z[z<(-.5)] + e[z<(-.5)]
  y[z>=(-.5)] <- alpha + 5*(z[z>=(-.5)]^2)*x[z>=(-.5)] + z[z>=(-.5)] + e[z>=(-.5)]
  
  fit <- lm(y~x*z)
  s <- cbind.data.frame(D=x, X=z, Y=y)
  bins <- inter.binning(Y = "Y", D = "D", X = "X", data = s)
  bins.comp <- bins$p.twosided
  bins <- bins$est.binning
  
  ar2 <- summary(fit)$adj.r.squared
  
  b1 <- coef(fit)[2]
  b3 <- coef(fit)[length(coef(fit))]
  vb1 <- as.matrix(vcov(fit))[2,2]
  vb3 <- as.matrix(vcov(fit))[length(coef(fit)),length(coef(fit))]
  vb1b3 <- as.matrix(vcov(fit))[2,length(coef(fit))]
  mv <- seq(min(z),max(z),length.out=100)
  
  conbx <- b1+b3*mv
  consx <- sqrt(vb1+vb3*(mv^2)+2*vb1b3*mv)
  
  upperx <- conbx+(1.96*consx)
  lowerx <- conbx-(1.96*consx)
  
  #investigate marginal effects: crosses zero heuristic
  dat <- as.data.frame(cbind(mv,conbx,lowerx,upperx))
  if ((dat[dat$mv > quantile(z,ptruncnorm(0,a=-1)),]$lowerx[1] < 0)&
      (dat[dat$mv>=quantile(z,ptruncnorm(-.5,a=-1)),]$upperx[1] > 0)&
      (dat[dat$mv>=quantile(z,.75),]$lowerx[1] > 0)) {
    zero <- 1
  } else {zero <- 0}
  
  #investigate binning estimator
  if ((bins[1,4] < 0 )&
      (bins[1,5] > 0 )&
      (bins[3,4] > 0)) {
    bin <- 1
  } else {bin <- 0} 
  
  #investigate binning estimator: compare 1 and 3
  if (bins.comp[3] > .05) {bin2 <- 0} else {bin2 <- 1}
  
  #investigate coefficient on b3
  pvalue <- coef(summary(fit))[length(coef(fit)),4]
  if (pvalue < .05) {star <- 1} else {star <- 0}
  
  return(c(n,zero,bin,bin2,star,ar2))
}



to_sim_nle <- function(n,error,alpha,beta1,beta2) {
  sims <- t(replicate(1000,nonlinear_interact(n,error,alpha,beta1,beta2)))
  #sims <- data.frame(cbind(sims,rep(NA,nrow(sims))))
  #colnames(sims) <- c(names,"cond.zero")
  #sims$cond.zero[sims$false.p==0] <- sims$false.plot[sims$false.p==0]
  #colMeans(na.omit(sims))
  return(colMeans(sims, na.rm=TRUE))
}


vary.n <- lapply(ns,to_sim_nle,error=error, alpha=alpha, beta1=beta1, beta2=beta2)
vary.n <- data.frame(matrix(unlist(vary.n),ncol = 6, byrow = TRUE))
write.csv(vary.n, file="vary.n.nle", row.names=FALSE)
names(vary.n) <- c("n","false.plot","false.bin","false.bin2","false.p","ar2")
write.csv(vary.n, file="vary.n.nle", row.names=FALSE)
vary.n <- read.csv("vary.n.nle")
vary.n.melt <- melt(vary.n, id.vars = c("n","ar2"), 
                    measure.vars = c("false.plot","false.bin","false.bin2","false.p"))
p <- ggplot(data=vary.n.melt, aes(x=n, y=value, group=variable, color=variable)) + geom_line() + geom_point()
p + geom_hline(yintercept=0.05) + scale_color_discrete(name="", labels=c("Marginal Effects Plot (1)", 
                                                                         "Binning Estimator (1)",
                                                                         "Binning Estimator (2)",
                                                                         "Coefficient")) +
  theme_classic() + labs(x = "N", y="Probability of Detecting the 'Correct' Interaction Effect")
